level <- read.csv(here("data_4_analysis/WaterLevel_Cleaned.csv"))
level$DateTime <- as.POSIXct(as.character(level$DateTime),format = "%Y-%m-%d %H:%M")
df <- read.csv(here("data_4_analysis/All_Stream_Data.csv"))
Here I filter data only to times where both station 1 and 3 recorded level data. I then use linear regression to test the relationship between them.
stn1_2 <- level%>%
filter(Serial == "2020436" | Serial == "2020421")%>%
select(DateTime, LEVEL_m, Serial)
stn1_2_wide <- spread(stn1_2, Serial, LEVEL_m)
colnames(stn1_2_wide) <- c("DateTime","Station_3","Station_1")
stn1_2_complete <- na.omit(stn1_2_wide) # Keep only rows with both data points
lm <- lm(stn1_2_complete$Station_1~stn1_2_complete$Station_3)
summary(lm)
##
## Call:
## lm(formula = stn1_2_complete$Station_1 ~ stn1_2_complete$Station_3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.049037 -0.019029 -0.003185 0.014365 0.192749
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.047655 0.002324 -20.51 <2e-16 ***
## stn1_2_complete$Station_3 1.174443 0.011681 100.54 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02605 on 2845 degrees of freedom
## Multiple R-squared: 0.7804, Adjusted R-squared: 0.7803
## F-statistic: 1.011e+04 on 1 and 2845 DF, p-value: < 2.2e-16
Here I do this by filtering data to only those rows where station 1 is missing and station 3 is not. We will use the coefficient from the regression model to estimate level at station 1.
lvl4est <- stn1_2_wide%>%
filter(is.na(Station_1) & !is.na(Station_3))
lvl4est$Station_1 <- lvl4est$Station_3 - (lvl4est$Station_3 * 0.047655)
I do this by calling the orignal data and using a filter to create a dataset without the rows that were missing station 1 level data, and then I add in the new estimates from the previous step
levelMod <- stn1_2_wide%>%
filter(!is.na(Station_1))
levelMod <- rbind(levelMod,lvl4est)
p <- ggplot(levelMod)+
geom_point(aes(x = DateTime, y = Station_1), col="orange")+
geom_point(aes(x = DateTime, y = Station_3), col="#20bab2")
ggplotly(p)
We use level data from Station 1 for discharge measurements at Station 1 and 2. We use level data from Station 3 for discharge measurements at station 3 and 4.
discharge <- read.csv(here("data_4_analysis/recorded_discharge.csv"))
discharge$DateTime <-as.POSIXct(paste0(discharge$Date,"",discharge$Time),format = "%m/%d/%Y %H:%M")
discharge <- discharge%>%
select(DateTime,Stn,Level,Discharge)
ggplot(discharge)+
geom_point(aes(x = Discharge, y = Level, col = Stn), size = 3)
## Warning: Removed 8 rows containing missing values (geom_point).
Code is shown for Station 1 only, other stations use identical code.
stn1 <- discharge%>%
filter(Stn == "Station 1")%>%
na.omit()%>%
arrange(Discharge)
x<-stn1$Discharge
y<-stn1$Level
#for simple models nls find good starting values for the parameters even if it throw a warning
m<-nls(y~a*x/(b+x))
df$stn1_Q <- predict(m,data.frame(x=df$lvl_436_m))
plot(x,y)
lines(x,predict(m),lty=2,col="red",lwd=3)
write.csv(df,here("data_4_analysis/All_Stream_Data.csv"))
df.sub <- df%>%
select(DateTime,stn1_Q,stn2_Q,stn3_Q,stn4_Q)%>%
gather(Station,Discharge,-DateTime)
df.sub$DateTime <- as.POSIXct(df.sub$DateTime)
ggplot(df.sub)+
geom_point(aes(x=DateTime, y=Discharge, col=Station))
## Warning: Removed 139332 rows containing missing values (geom_point).